################################################################################################################
#                                                                                                              #
# The COMPUTE module and the 3 modules that follow must be run before SNP survival estimation can be completed #
#                                                                                                              #
################################################################################################################

 #The E_COMPUTE function creates the V matrix used to calculate survival
E_COMPUTE <- function(R)
 {
 PDFvalue <- dexp(R)
 V <- matrix(nrow=n,ncol=5,0)		#creates an empty matrix to be filled by survival values
 V[,1] <- 1-pexp(R)			#recursive formula for I(0,r)
 V[,2] <- R * PDFvalue + V[,1]		# I(1,r)
 V[,3] <- R^2 * PDFvalue + 2*V[,2]	# I(2,r)
 V[,4] <- R^3 * PDFvalue + 3*V[,3]	# I(3,r)
 V[,5] <- R^4 * PDFvalue + 4*V[,4]	# I(4,r)
 return(V)
 }
  
#E_module_0 computes the negative log likelihood for the k=0 case
E_module_0 <- function(omega)
 {
 X <- as.matrix(snp/omega[1])		
 
 
 f <- (omega[1]^-1 * exp(-X))			#SNP density function for k=0
 output <- E_COMPUTE(X) 
 S<- output[,1]
 neglike <- -delta*log(f) - (1-delta)*log(S)	#defines negative log likelihood
 SUM = sum(neglike)				#summation of negative log likelihood
 return(SUM)					#saves SUM values
 }
  
E_module_1 <- function(omega)
 {
 X <- as.matrix(snp/omega[1]) 
 a0 <- cos(omega[2])-sin(omega[2])
 a1 <- sin(omega[2])				#defines a0 and a1 as functions of phi
 f <- (a0+a1*X)^2*(omega[1])^-1 * exp(-X)		#SNP density function for k=1
 OUT <- E_COMPUTE(X)                 
 S <- a0**2 * OUT[,1] + a1**2 * OUT[,3] + 2*a0*a1*OUT[,2]    #survival function for k=1
 neglike <- -delta*log(f) - (1-delta)*log(S)        
 SUM = sum(neglike) 
 return(SUM)
 }
 
E_module_2 <- function(omega)
 {
 X <- as.matrix(snp/omega[1])  
 a0 <- (cos(omega[2])-sin(omega[2])*cos(omega[3])+sin(omega[2])*sin(omega[3]))
 a1 <- (sin(omega[2])*cos(omega[3])-2*sin(omega[2])*sin(omega[3]))
 a2 <- (.5*sin(omega[2])*sin(omega[3]))
 f <- ((omega[1]^-1)*(a0+a1*X+a2*X^2)^2 *exp(-X))
 output<- E_COMPUTE(X)
 S<- (a0**2*output[,1] + a1^2*output[,3] + a2^2*output[,5] + 2*a0*a1*output[,2] + 2*a0*a2*output[,3]  + 2*a1*a2*output[,4])
 neglike <- -delta*log(f) - (1-delta)*log(S)        
 SUM = sum(neglike) 
 return(SUM)
 }
 










set.seed(12345)

n <- 200        #sets the number of observations in our simulated data set
N <- 100 
 




# GENERATING SURVIVAL TIMES IN THE "All_Data" MATRIX

#################################################### EXPONENTIAL DATA
 #therate <- 2.5        
 #All_Survivals_vector <- rexp(N*n,therate)        

#################################################### WEIBULL DATA
 #theshape <- 4
 #thescale <- 2
 #All_Survivals_vector <- rweibull(N*n,theshape,thescale)    

#################################################### LOGNORMAL DATA
 All_Survivals_vector <-  rlnorm(N*n, meanlog = 0, sdlog = 1)







# GENERATING CENSORING TIMES IN THE "All_Deltas" MATRIX

#################################################### CENSORING TIMES WHEN DATA IS EXPONENTIAL 
 #All_Censorings_vector <- runif(N*n, min=0, max=2.)    # 20% right-censoring for exp data
# All_Censorings_vector <- runif(N*n, min=4, max=8.)    # No censoring for exp data

#################################################### CENSORING TIMES WHEN DATA IS WEIBULL  
 #All_Censorings_vector <- runif(N*n, min=0, max=9.05)    

#################################################### CENSORING TIMES WHEN DATA IS LOGNORMAL
 All_Censorings_vector <- runif(N*n, min=0, max=9.8)     






# GENERATING "All_Data" which is the vector of times (either survival or censoring times)
Nn <- N*n

A <- 1*(All_Censorings_vector >= All_Survivals_vector) * All_Survivals_vector

B <- 1*(All_Censorings_vector < All_Survivals_vector) * All_Censorings_vector

All_Times_vector <- A + B

junk1 <- cbind(All_Survivals_vector, All_Censorings_vector, All_Times_vector)
junk1[1:20,]










# GENERATING THE DELTA VECTOR 
All_Deltas_vector <- matrix(ncol=1, nrow=N*n,0)
Nn <- N*n

# Below is a new way of generating the vector of delta values that does NOT require a LOOP!!!
All_Deltas_vector <- 1*(All_Censorings_vector > All_Survivals_vector)

# CHECKING THE DELTA VECTOR TO SEE IF IT IS CORRECT
junk2 <- cbind(All_Survivals_vector, All_Censorings_vector, All_Deltas_vector)
junk2[1:20,]
mean(All_Deltas_vector)







# MAKING THE HUGE VECTOR OF TIMES INTO A MATIX AND
# MAKING THE HUGE VECTOR OF 0/1 VALUES INTO A MATRIX
All_Times <- matrix(All_Times_vector, ncol=n)
All_Deltas <- matrix(All_Deltas_vector  ,ncol=n)



# GRAPHING STUFF
max_value <- max(All_Survivals_vector)
graphing_xvalues <- seq(0.001,max_value+4,.01) #vector of times from 0.01 to our max value, in increments of .01








### BEFORE WE HAD THE 2 LINES BELOW TO GENERATE CENSORING TIMES BUT THIS IS WRONG!!!
### All_Deltas_vector <-rbinom(N*n,1,proportion) #creates delta vector of successes and failures
### All_Deltas <- matrix(All_Deltas_vector, ncol=n)

 







Final_Results <- matrix(nrow=N, ncol=5, 9999)

######################### STARTING THE BIG LOOP
######################### STARTING THE BIG LOOP
######################### STARTING THE BIG LOOP

for(j in 1:N)
{
snp <- All_Times[j,]
delta <- All_Deltas[j,]

newdata <- cbind(snp,delta)    #combines the snp and delta vectors to create a data set with censoring
newdataB <- subset(newdata,delta==1) #eliminates censored data

xbar <- mean(newdataB[,1]) #calculates the mean of the uncensored data

######optimizing k=0
x_values <- seq(xbar-.5*xbar,xbar+.5*xbar,xbar/4)
#x_values
x_length_K0 <- length(x_values)     
K0_results <- matrix(nrow=x_length_K0, ncol=4,0)    
row <- 0                
for (x in x_values)            
{
row <- row+1                
#print(x)
out <- optim( x , E_module_0)
#print(out$par)    
K0_results[row,1] <- out$par        # storing out$par in the correct row and 1st 2 columns of the "K1_results" matrix
}
#K0_results



#################### computing HQ for k=0
for (i in 1:x_length_K0)         
{
omega <- K0_results[i,1]        
fopt <- E_module_0(omega)         # computing "fopt" needed for HQ
HQ <-  2*fopt + 2*1*log(log(n))        # computing HQ
K0_results[i,4]    <- HQ            # put HQ in the 4th column of the i^th row of the "K1_results" matrix
}                # printing the "K1_results" matrix again - notice that the 4th column is filled in now
#choosing the best estimates
omega_K0 <- c(   K0_results[which.min(K0_results[,4]),]  , 0)



######optimizing k=1
phi_values <- seq(-1.5,1.5,.1)
#phi_values
phi_length_K1 <- length(phi_values)    # setting up a matrix to store the K=1 omega values
K1_results <- matrix(nrow=phi_length_K1, ncol=4,0)    
row <- 0                # setting the initial value of "row" to zero
for (phi in phi_values)            # this loop which goes from 1 to 31 and computes estimates for each set of starting values
{
row <- row+1                # increasing the "row" value by one each time the loop runs
#print(phi)
a0 <- cos(phi)-sin(phi)
a1 <- sin(phi)
EZ <- a0^2 + 6*a1^2 + 4*a0*a1
r <- xbar/EZ
out <- optim(c(r,phi), E_module_1)
#print(out$par)    
K1_results[row,1:2] <- out$par        # storing out$par in the correct row and 1st 2 columns of the "K1_results" matrix
}
#K1_results                # printing the "K1_results" matrix which contains all omega estimates
#################### computing HQ for k=1
for (i in 1:phi_length_K1)        # starting a loop that goes froom 1 to 31
{
omega <- K1_results[i,1:2]        # this says that omega is equal to the first 2 columns of the i^th row of the "K1_results" matrix
fopt <- E_module_1(omega)         # computing "fopt" needed for HQ
HQ <-  2*fopt + 2*2*log(log(n))        # computing HQ
K1_results[i,4]    <- HQ            # put HQ in the 4th column of the i^th row of the "K1_results" matrix
}
#K1_results                # printing the "K1_results" matrix again - notice that the 4th column is filled in now
#choosing the best estimates
omega_K1 <- c(   K1_results[which.min(K1_results[,4]),]  , 1)




######optimizing k=2
phi1_values <- seq(-1.5,1.5,1)
phi2_values <- seq(-1.5,1.5,1)
phi_length_K2 <- (length(phi1_values)*length(phi2_values))
K2_results <- matrix(nrow=phi_length_K2, ncol=4,0)
row <- 0
for (phi1 in phi1_values)
 {
 for (phi2 in phi2_values)
 {
row <- row+1
 a0 <- (cos(phi1)-sin(phi1)*cos(phi2)+sin(phi1)*sin(phi2))
 a1 <- (sin(phi1)*cos(phi2)-2*sin(phi1)*sin(phi2))
 a2 <- (.5*sin(phi1)*sin(phi2))
 EZ <- a0**2 + 6*a1**2 + 120*a2**2 + 4*a0*a1 + 12*a0*a2 + 48*a1*a2
 r <- xbar/EZ
 out <- optim(c(r,phi1,phi2), E_module_2)
 #print(out$par)
K2_results[row,1:3] <- out$par
 }
 }
#################### computing HQ for k=2
for(i in 1:phi_length_K2)
 {
 omega <- K2_results[i,1:3]
 fopt <- E_module_2(omega)
 HQ <- 2*fopt + 2*3*log(log(n))
 K2_results[i,4] <- HQ
 }
#K2_results
#choosing best estimate for k=2
omega_K2 <- c(    K2_results[which.min(K2_results[,4]),]   , 2)



# combining the best K=0, K=1, and K=2 estimates into one matrix
all3 <- rbind(omega_K0, omega_K1, omega_K2)
final_omega <- all3[ which.min (all3[,4]), ]
Final_Results[j, ] <- final_omega
} 
######################### ENDING THE BIG LOOP
######################### ENDING THE BIG LOOP
######################### ENDING THE BIG LOOP



Final_Results








######### PLOTTING CODE
plot.new()
par(mai=c(1,1,1,1))
box()
#####################plot.window(xlim= c(0, max_value-1.5), ylim=c(0,1), xaxs="i", yaxs="i")
 #plot.window(xlim= c(0, 2.5), ylim=c(0,1), xaxs="i", yaxs="i")            #EXPONENTIAL
 plot.window(xlim= c(0, 6), ylim=c(0,1), xaxs="i", yaxs="i")            #LOGNORMAL
axis(1)
axis(2)
title(main="SNP and KM Estimates", xlab="time", ylab="survival probability")




# DEFINING THE 5 TIMES OF INTEREST
quantiles <- c(.1,.3,.5,.7,.9)
 #times <- qexp(quantiles,rate=therate)                #TIMES OF INTEREST FOR EXP DATA
 #times <- qweibull(quantiles,shape=theshape,scale=thescale)    #TIMES OF INTEREST FOR WEIB DATA
 times <- qlnorm(quantiles, meanlog = 0, sdlog = 1)        #TIMES OF INTEREST FOR LOGNORMAL DATA
# note that 'qnorm(.975)' results in 1.96
times
 




Selected_KM_survivals <- matrix(nrow=N,ncol=5)
for (j in 1:N)
{
# KM code
snp <- All_Times[j,]
delta <- All_Deltas[j,]

library(survival)                    #loads survival package needed to graph KM
fit <- survfit(Surv(snp, delta) ~ 1)                 #calculates survival values based on KM
lines(fit, conf.int=FALSE,col="grey60")    #plots KM
KMout <- approx(fit$time, fit$surv, times, method="constant" , f=0, yleft=1, yright=0)
Selected_KM_survivals[j,] <- KMout$y
}
Selected_KM_survivals
 





# SETTING UP MATRICES NEEDED FOR GRAPHING LOOP
Xlength <- length(graphing_xvalues)
All_SNP_survivals <- matrix(nrow=N, ncol=Xlength)
Selected_SNP_survivals <- matrix(nrow=N,ncol=5)
for(j in 1:N)
{
X <- graphing_xvalues/Final_Results[j,1]   
X2 <- times/Final_Results[j,1]

if (Final_Results[j,5] == 0)
    {
    S <- 1-pexp(X)
    lines(graphing_xvalues,S,col="blue")
        
    All_SNP_survivals[j,] <- S    # fills in SNP survivals at all of the many times
    
    Selected_SNP_survivals[j,]  <- 1-pexp(X2)    # fills in SNP survivals for 5 times
    }

else if (Final_Results[j,5] == 1)
    {
        PDFvalue <- dexp(X)
    m <- length(X)
    V <- matrix(nrow=m,ncol=3,0)        #creates a smaller V matrix
    V[,1] <- 1-pexp(X)
    V[,2] <- X * PDFvalue + V[,1]
    V[,3] <- X^2*PDFvalue + 2*V[,2]
    a0 <- cos( Final_Results[j,2] )-sin( Final_Results[j,2] )
    a1 <- sin( Final_Results[j,2] )         
    S <- a0 **2 * V[,1] + a1 **2 * V[,3] + 2*a0*a1*V[,2] #from survival equation
    lines(graphing_xvalues,S,col="green")
    
    All_SNP_survivals[j,] <- S    # fills in SNP survivals at all of the many times

    # UPDATE WHAT GOES HERE
    PDFvalue2 <- dexp(X2)
    m2 <-length(X2)
    V2 <- matrix(nrow=m2,ncol=3,0)
    
    V2[,1] <- 1-pexp(X2)
    
    V2[,2] <- X2 * PDFvalue2 + V2[,1]
    
    V2[,3] <- (X2)^2*PDFvalue2 + 2*V2[,2]
    ###a02 <- cos( Final_Results[j,2] )-sin( Final_Results[j,2] )
    ###a12 <-sin( Final_Results[j,2] ) 
    S2 <- a0 **2 * V2[,1] + a1 **2 * V2[,3] + 2*a0*a1*V2[,2]
    Selected_SNP_survivals[j,]  <- S2        # fills in SNP survivals for 5 times
         }

else  
    {
    PDFvalue <- dexp(X)
    m <- length(X)
    V <- matrix(nrow=m,ncol=5,0)
    V[,1] <- 1-pexp(X)            #recursive formula for I(0,r)
     V[,2] <- X * PDFvalue + V[,1]        # I(1,r)
    V[,3] <- X^2 * PDFvalue + 2*V[,2]    # I(2,r)
    V[,4] <- X^3 * PDFvalue + 3*V[,3]    # I(3,r)
    V[,5] <- X^4 * PDFvalue + 4*V[,4]    # I(4,r)
    a0 <- (cos( Final_Results[j,2] )-sin( Final_Results[j,2] )*cos( Final_Results[j,3] )+sin( Final_Results[j,2] )*sin( Final_Results[j,3] ))
    a1 <- (sin( Final_Results[j,2] )*cos( Final_Results[j,3] )-2*sin( Final_Results[j,2] )*sin(Final_Results[j,3] ))
    a2 <- (.5*sin( Final_Results[j,2] )*sin( Final_Results[j,3] ))
    S <- (a0**2*V[,1] + a1^2*V[,3] + a2^2*V[,5] + 2*a0*a1*V[,2] + 2*a0*a2*V[,3] + 2*a1*a2*V[,4])
         lines(graphing_xvalues,S,col="red")

    All_SNP_survivals[j,] <- S    # fills in SNP survivals at all of the many times

    # UPDATE WHAT GOES HERE

    
    PDFvalue2 <- dexp(X2)
    m2 <- length(X2)
    V2 <- matrix(nrow=m2,ncol=5,0)
    V2[,1] <- 1-pexp(X2)            #recursive formula for I(0,r)
     V2[,2] <- X2 * PDFvalue2 + V2[,1]        # I(1,r)
    V2[,3] <- (X2)^2 * PDFvalue2 + 2*V2[,2]    # I(2,r)
    V2[,4] <- (X2)^3 * PDFvalue2 + 3*V2[,3]    # I(3,r)
    V2[,5] <- (X2)^4 * PDFvalue2 + 4*V2[,4]    # I(4,r)
    ###a02 <- (cos( Final_Results[j,2] )-sin( Final_Results[j,2] )*cos( Final_Results[j,3] )+sin( Final_Results[j,2] )*sin( Final_Results[j,3] ))
    ###a12 <- (sin( Final_Results[j,2] )*cos( Final_Results[j,3] )-2*sin( Final_Results[j,2] )*sin(Final_Results[j,3] ))
    ###a22 <- (.5*sin( Final_Results[j,2] )*sin( Final_Results[j,3] ))
    S2 <- (a0**2*V2[,1] + a1^2*V2[,3] + a2^2*V2[,5] + 2*a0*a1*V2[,2] + 2*a0*a2*V2[,3] + 2*a1*a2*V2[,4])

    Selected_SNP_survivals[j,]  <- S2        # fills in SNP survivals for 5 times     
    }
}

 
Selected_SNP_survivals






# YOU WILL KNOW IF YOU GET THE RIGHT THING AFTER PRINTING "Avg_at_5times"
Avg_SNPsurv_at_5times <- colMeans(Selected_SNP_survivals,na.rm=FALSE)
Avg_SNPsurv_at_5times
Avg_KMsurv_at_5times <- colMeans(Selected_KM_survivals,na.rm=FALSE)
Avg_KMsurv_at_5times
true_surv <- c(.9,.7,.5,.3,.1)
SNPbias <- (Avg_SNPsurv_at_5times - true_surv)
SNPbias
KMbias <- (Avg_KMsurv_at_5times - true_surv)
KMbias
SNPvariance <- apply(Selected_SNP_survivals, 2, var)
SNPvariance  
KMvariance <- apply(Selected_KM_survivals, 2, var)
KMvariance 
SNP_MSE <- SNPvariance + SNPbias^2
SNP_MSE
KM_MSE <- KMvariance + KMbias^2
KM_MSE

RE <- KM_MSE/SNP_MSE
RE




# graphing SNP average
SNP_average <- colMeans(All_SNP_survivals,na.rm=FALSE)
lines(graphing_xvalues,SNP_average,lwd=8,col="orange")





# graphing the true survival
 #truth <- exp(-therate*graphing_xvalues)        # EXPONENTIAL TRUE SURVIVAL
 #truth <- exp(- (graphing_xvalues/thescale)^theshape)     # WEIBULL TRUE SURVIVAL
 truth <- 1-plnorm(graphing_xvalues, meanlog = 0, sdlog = 1, lower.tail = TRUE, log.p = FALSE)   # LOGNORMAL TRUE SURVIVAL
lines(graphing_xvalues,truth,lwd=6,col="black")